home *** CD-ROM | disk | FTP | other *** search
- _PARALLEL DSP FOR DESIGNING ADAPTIVE FILTERS_
- by Daniel Chen
-
-
-
- [LISTING NEW]
-
- /******* PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #1 *******/
- /* Initialization */
- xptr = &x[0];
- wptr = &w[0];
-
- for (i=0;i<N1;i++){
- *xptr++ = 0.0;
- *wptr++ = 0.0;
- }
- /* N1-1
- * Compute y1 = SUM w[i] * x[i]
- * i=0
- */
- xptr = &x[0];
- wptr = &w[0];
- input(x); /* input x from A/D converter */
- *xptr = x;
- input (d); /* input d from A/D converter */
-
- for (i=0;i<N1;i++)
- y1 += *xptr++ * *wptr++;
- /* Compute y = y1 + y2 + y3 + y4 */
- receive(y2,y3,y4); /* receive y2, y3, y4 form processor 2, 3, 4 */
- y = y1 + y2 + y3 + y4;
- /* Compute error signal e */
- e = d - y;
- output(y); /* output y to D/A converter */
- pass(e); /* pass e to processor 2, 3, 4 */
- /* Update filter weights w[] */
- xptr = &x[N1-1];
- wptr = &w[N1-1];
- pass (*xptr); /* pass x(n-N1) to processor #2 */
- for (i=N1;i>0;i--){
- *wptr-- += mu * e *xptr--;
- *(xptr+1) = *xptr; /* delayed tap is implemented in circular buffer */
- }
-
-
- [LISTING TWO]
-
- /******* PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #2 *******/
- /* Initialization */
- xptr = &x[0];
- wptr = &w[0];
- for (i=0;i<N2;i++){
- *xptr++ = 0.0;
- *wptr++ = 0.0;
- }
- /* N2-1
- * Compute y2 = SUM w[i] * x[i]
- * i=0
- */
- xptr = &x[0];
- wptr = &w[0];
- receive(x); /* receive x(n-N1) from processor #1 */
- *xptr = x;
- for (i=0;i<N2;i++)
- y2 += *xptr++ * *wptr++;
- /* pass y2 and receive e */
- pass(y2); /* pass y2 to processor #1 */
- receive(e); /* receive e(n) form processor #1 */
- /* Update filter weights w[] */
- xptr = &x[N2-1];
- wptr = &w[N2-1];
- pass (*xptr); /* pass x(n-N1-N2) to processor #3 */
- for (i=N2;i>0;i--){
- *wptr-- += mu * e *xptr--;
- *(xptr+1) = *xptr; /* delayed tap is implemented in circular buffer */
- }
-
-
- [LISTING THREE]
-
- /****** PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #3 ******/
- /* Initialization */
- xptr = &x[0];
- wptr = &w[0];
-
- for (i=0;i<N3;i++){
- *xptr++ = 0.0;
- *wptr++ = 0.0;
- }
- /* N3-1
- * Compute y3 = SUM w[i] * x[i]
- * i=0
- */
- xptr = &x[0];
- wptr = &w[0];
- receive(x); /* receive x(n-N1-N2) from processor #2 */
- *xptr = x;
-
- for (i=0;i<N3;i++)
- y3 += *xptr++ * *wptr++;
- /* pass y3 and receive e */
- pass(y3); /* pass y3 to processor #1 */
- receive(e); /* receive e(n) form processor #1 */
-
- /* Update filter weights w[] */
- xptr = &x[N3-1];
- wptr = &w[N3-1];
- pass (*xptr); /* pass x(n-N1-N2-N3) to processor #4 */
- for (i=N3;i>0;i--){
- *wptr-- += mu * e *xptr--;
- *(xptr+1) = *xptr; /* delayed tap is implemented
- in circular buffer */
- }
-
-
- [LISTING FOUR]
-
- /****** PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #4 ******/
- /* Initialization */
- xptr = &x[0];
- wptr = &w[0];
-
- for (i=0;i<N4;i++){
- *xptr++ = 0.0;
- *wptr++ = 0.0;
- }
- /* N4-1
- * Compute y4 = SUM w[i] * x[i]
- * i=0
- */
- xptr = &x[0];
- wptr = &w[0];
- receive(x); /* receive x(n-N1-N2-N3) from processor #3 */
- *xptr = x;
-
- for (i=0;i<N4;i++)
- y4 += *xptr++ * *wptr++;
- /* pass y4 and receive e */
- pass(y4); /* pass y4 to processor #1 */
- receive(e); /* receive e(n) form processor #1 */
-
- /* Update filter weights w[] */
- xptr = &x[N4-1];
- wptr = &w[N4-1];
- for (i=N3;i>0;i--){
- *wptr-- += mu * e *xptr--;
- *(xptr+1) = *xptr; /* delayed tap is implemented
- in circular buffer */
- }
-
-
- [LISTING FIVE]
-
- **********************************************************************
- * CONST.H - This file set up the constant for Cascade TMS320C40
- * Adaptive Filter programs: LMS1.ASM LMS2.ASM LMS3.ASM LMS4.ASM
- **********************************************************************
- order1 .set N1 ; filter order for #1 C40
- order2 .set N2 ; filter order for #2 C40
- order3 .set N3 ; filter order for #3 C40
- order4 .set N4 ; filter order for #4 C40
- mu .set 0.01 ; step size
- io_port .set 0100081h ; data I/O comm port addr for d, x, & y
- C40_1_2 .set 0100041h ; comm port address from #1 to #2 C40
- C40_1_3 .set 0100051h ; comm port address from #1 to #3 C40
- C40_1_4 .set 0100061h ; comm port address from #1 to #4 C40
- C40_2_1 .set 0100071h ; comm port address from #2 to #1 C40
- C40_2_3 .set 0100061h ; comm port address from #2 to #3 C40
- C40_2_4 .set 0100051h ; comm port address from #2 to #4 C40
- C40_3_1 .set 0100081h ; comm port address from #3 to #1 C40
- C40_3_2 .set 0100071h ; comm port address from #3 to #2 C40
- C40_3_4 .set 0100061h ; comm port address from #3 to #4 C40
- C40_4_1 .set 0100071h ; comm port address from #4 to #1 C40
- C40_4_2 .set 0100081h ; comm port address from #4 to #2 C40
- C40_4_3 .set 0100091h ; comm port address from #4 to #3 C40
-
-
-
- [LISTING SIX]
-
- ******************************************************************
- * LMS1 : Cascade TMS320C40 adaptive filter #1 Using Transversal
- * Structure and LMS Algorithm, Looped Code
- * Configuration:
- * d(n) --------------------------+
- * |
- * e(n) |+
- * +-----<-----(SUM)
- * | |-
- * --------+-------- |
- * x(n) ----|Adaptive Filter|-----+--------> y(n)
- * -----------------
- * +--------<-------+-------<--------+-------<--------+
- * | |y2(n) |y3(n) |y4(n)
- * y(n)<-+ | | | |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
- * x(n)---->| |----->| |----->| |----->| |
- * +->| # 1 | | # 2 | | # 3 | | # 4 |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * d(n)--+ | | | |
- * e(n)| | | |
- * +-------->-------+------->--------+------->--------+
- * where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
- * Algorithm for processor #1:
- * N1-1
- * y1(n) = SUM w(k)*x(n-k) k=0,1,2,...,N1-1
- * k=0
- * y(n) = y1(n) + y2(n) + y3(n) + y4(n)
- * e(n) = d(n) - y(n)
- * w(k) = w(k) + u*e(n)*x(n-k) k=0,1,2,...,N1-1
- * where filter order N = N1 + N2 + N3 + N4 and u is the step size mu,
- **********************************************************************
- .include "const.h" ; include the constant definition file
- .sect "vector"
- reset .word begin
- ; Initialize pointers and arrays
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; for (i=0;i<N1;i++){
- ; *xptr++ = 0.0;
- ; *wptr++ = 0.0;
- ; }
- .text
- begin .set $
- LDP @io_addr ; set data page
- LDI 0,R2 ; R2 = 0
- LDF 0.0,R1 ; R1 = 0.0
- LDI @io_addr,AR4 ; set pointer for data I/O
- LDI @C40addr2,AR5 ; set pointer for #2 C40 comm port
- LDI @C40addr3,AR6 ; set pointer for #3 C40 comm port
- LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
- LDI @xn_addr,AR0 ; set pointer for x[]
- LDI @wn_addr,AR1 ; set pointer for w[]
- STI R2,*-AR5(1) ; enable #2 C40 comm port
- STI R2,*-AR6(1) ; enable #3 C40 comm port
- STI R2,*-AR7(1) ; enable #4 C40 comm port
- STF R1,*+AR5(1) ; start #2 C40
- RPTS order1-1
- STF R1,*AR0++(1)% ; x[] = 0.
- || STF R1,*AR1++(1)% ; w[] = 0.
- LDI order1,BK ; set up circular buffer
- input:
- ; Compute filter output y1(n)
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; input(x); /* input x from A/D converter */
- ; input (d); /* input d from A/D converter */
- ; *xptr = x;
- ; for (i=0;i<N1;i++)
- ; y1 += *xptr++ * *wptr++;
- LDI order1-2,RC
- RPTBD filter
- LDF *AR4,R6 ; input x(n)
- LDF *AR4,R7 ; input d(n)
- || STF R6,*AR0 ; insert x(n) to buffer
- MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || SUBF3 R2,R2,R2 ; R2 = 0.0
- filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || ADDF3 R1,R2,R2 ; y1(n) = w[].x[]
- ADDF R1,R2 ; include last result
- ; compute y(n) signals
- ; receive(y2,y3,y4); /* receive y2, y3, y4 form processor 2, 3, 4 */
- ; y = y1 + y2 + y3 + y4;
- ADDF *AR5,R2 ; add y2(n)
- ADDF *AR6,R2 ; add y3(n)
- ADDF *AR7,R2 ; add y4(n)
- ; Compute error signal e(n)
- ; e = d - y;
- ; pass(e); /* pass e to processor 2, 3, 4 */
- SUBF R2,R7 ; e(n) = d(n) - y(n)
- MPYF @u,R7 ; R7 = err = e(n) * u
- ; Output y(n) signal and e(n)
- ; output(y); /* output y to D/A converter */
- ; pass(e); /* pass e to processor 2, 3, 4 */
- STF R7,*+AR5(1) ; send out e(n)
- || STF R7,*+AR6(1) ; send out e(n)
- STF R2,*+AR4(1) ; send out y(n)
- || STF R7,*+AR7(1) ; send out e(n)
- ; Update weights w(n)
- ; xptr = &x[N1-1];
- ; wptr = &w[N1-1];
- ; pass (*xptr); /* pass x(n-N1) to processor #2 */
- ; for (i=N1;i>0;i--){
- ; *wptr-- += mu * e *xptr--;
- ; *(xptr+1) = *xptr; /* delayed tap is implemented
- ; in circular buffer */
- ; }
- LDI order1-3,RC ; initialize repeat counter
- RPTBD weight ; do i = 0, N-3
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
- ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
- NOP
-
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
- || STF R2,*AR1++(1)% ; update wi(n+1)
- weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
- LDF *AR0,R6
- || STF R2,*AR1++(1)% ; update wi(n+1)
- BD input ; delay branch
- MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
- || STF R6,*+AR5(1) ; shift x(n-N) to #2 C40
- ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
- STF R2,*AR1++(1)% ; update last w
-
- ; Define constants
- xn .usect "buffer",order1
- wn .usect "coeffs",order1
- .data
- io_addr .word io_port
- C40addr2 .word C40_1_2
- C40addr3 .word C40_1_3
- C40addr4 .word C40_1_4
- xn_addr .word xn
- wn_addr .word wn
- u .float mu
- .end
-
-
-
- [LISTING SEVEN]
-
- ******************************************************************
- * LMS2 : Cascade TMS320C40 adaptive filter #2 Using Transversal
- * Structure and LMS Algorithm, Looped Code
- * Configuration:
- * d(n) --------------------------+
- * |
- * e(n) |+
- * +-----<-----(SUM)
- * | |-
- * --------+-------- |
- * x(n) ----|Adaptive Filter|-----+--------> y(n)
- * -----------------
- * +--------<-------+-------<--------+-------<--------+
- * | |y2(n) |y3(n) |y4(n)
- * y(n)<-+ | | | |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
- * x(n)---->| |----->| |----->| |----->| |
- * +->| # 1 | | # 2 | | # 3 | | # 4 |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * d(n)--+ | | | |
- * e(n)| | | |
- * +-------->-------+------->--------+------->--------+
- * where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
- * Algorithm for processor #2:
- * N2-1
- * y2(n) = SUM w(N1+k)*x(n-N1-k) k=0,1,2,...,N2-1
- * k=0
- * w(N1+k) = w(N1+k) + u*e(n)*x(n-N1-k) k=0,1,2,...,N2-1
- * where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
- **********************************************************************
- .include "const.h" ; include the constant definition file
- .sect "vector"
- reset .word begin
- ; Initialize pointers and arrays
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; for (i=0;i<N2;i++){
- ; *xptr++ = 0.0;
- ; *wptr++ = 0.0;
- ; }
- .text
- begin .set $
- LDP @C40addr1 ; set data page
- LDI 0,R2 ; R2 = 0
- LDF 0.0,R1 ; R1 = 0.0
- LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
- LDI @C40addr3,AR6 ; set pointer for #3 C40 comm port
- LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
- LDI @xn_addr,AR0 ; set pointer for x[]
- LDI @wn_addr,AR1 ; set pointer for w[]
- STI R2,*-AR6(1) ; enable #3 C40 comm port
- STI R2,*-AR5(1) ; enable #1 C40 comm port
- STI R2,*-AR7(1) ; enable #4 C40 comm port
- STF R1,*+AR6(1) ; start #3 C40
- RPTS order2-1
- STF R1,*AR0++(1)% ; x[] = 0.
- || STF R1,*AR1++(1)% ; w[] = 0.
- LDI order2,BK ; set up circular buffer
- input:
- ; Compute filter output y(n)
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; receive(x); /* receive x(n-N1) from processor #1 */
- ; *xptr = x;
- ; for (i=0;i<N2;i++)
- ; y2 += *xptr++ * *wptr++;
- LDI order2-2,RC
- RPTBD filter
- LDF *AR5,R6 ; input x(n)
- STF R6,*AR0 ; insert x(n) to buffer
- MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || SUBF3 R2,R2,R2 ; R2 = 0.0
- filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || ADDF3 R1,R2,R2 ; y2(n) = w[].x[]
- ADDF R1,R2 ; include last result
- ; Output y2(n) signals
- ; pass(y2); /* pass y2 to processor #1 */
- STF R2,*+AR5(1) ; send y2(n) to #1 C40
- ; Input error signal e(n)
- ; receive(e); /* receive e(n) form processor #1 */
- LDF *AR5,R7 ; load e(n) from #1 C40
- ; Update weights w(n)
- ; xptr = &x[N2-1];
- ; wptr = &w[N2-1];
- ; pass (*xptr); /* pass x(n-N1-N2) to processor #3 */
- ; for (i=N2;i>0;i--){
- ; *wptr-- += mu * e *xptr--;
- ; *(xptr+1) = *xptr; /* delayed tap is implemented
- ; in circular buffer */
- ; }
- ;
- LDI order2-3,RC ; initialize repeat counter
- RPTBD weight ; do i = 0, N2-3
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
- ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
- NOP
-
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
- || STF R2,*AR1++(1)% ; update wi(n+1)
- weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
-
- LDF *AR0,R6
- || STF R2,*AR1++(1)% ; update wi(n+1)
- BD input ; delay branch
- MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
- || STF R6,*+AR6(1) ; shift x(n-N) to #3 C40
- ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
- STF R2,*AR1++(1)% ; update last w
-
- ; Define constants
- xn .usect "buffer",order2
- wn .usect "coeffs",order2
- .data
- C40addr1 .word C40_2_1
- C40addr3 .word C40_2_3
- C40addr4 .word C40_2_4
- xn_addr .word xn
- wn_addr .word wn
- .end
-
-
- [LISTING EIGHT]
-
- ******************************************************************
- * LMS3 : Cascade TMS320C40 adaptive filter #3 Using Transversal
- * Structure and LMS Algorithm, Looped Code
- * Configuration:
- * d(n) --------------------------+
- * |
- * e(n) |+
- * +-----<-----(SUM)
- * | |-
- * --------+-------- |
- * x(n) ----|Adaptive Filter|-----+--------> y(n)
- * -----------------
- * +--------<-------+-------<--------+-------<--------+
- * | |y2(n) |y3(n) |y4(n)
- * y(n)<-+ | | | |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
- * x(n)---->| |----->| |----->| |----->| |
- * +->| # 1 | | # 2 | | # 3 | | # 4 |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * d(n)--+ | | | |
- * e(n)| | | |
- * +-------->-------+------->--------+------->--------+
- * where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
- * Algorithm for processor #3:
- * N3-1
- * y3(n) = SUM w(N1+N2+k)*x(n-N1-N2-k) k=0,1,2,...,N3-1
- * k=0
- * w(N1+N2+k) = w(N1+N2+k) + u*e(n)*x(n-N1-N2-k) k=0,1,2,...,N3-1
- * where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
- **********************************************************************
- .include "const.h" ; include the constant definition file
- .sect "vector"
- reset .word begin
- ; Initialize pointers and arrays
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; for (i=0;i<N3;i++){
- ; *xptr++ = 0.0;
- ; *wptr++ = 0.0;
- ; }
- .text
- begin .set $
- LDP @C40addr1 ; set data page
- LDI 0,R2 ; R2 = 0
- LDF 0.0,R1 ; R1 = 0.0
- LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
- LDI @C40addr2,AR6 ; set pointer for #2 C40 comm port
- LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
- LDI @xn_addr,AR0 ; set pointer for x[]
- LDI @wn_addr,AR1 ; set pointer for w[]
- STI R2,*-AR7(1) ; enable #4 C40 comm port
- STI R2,*-AR6(1) ; enable #2 C40 comm port
- STI R2,*-AR5(1) ; enable #1 C40 comm port
- STF R1,*+AR7(1) ; start #4 C40
- RPTS order3-1
- STF R1,*AR0++(1)% ; x[] = 0.
- || STF R1,*AR1++(1)% ; w[] = 0.
- LDI order3,BK ; set up circular buffer
- input:
- ; Compute filter output y(n)
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; receive(x); /* receive x(n-N1-N2) from processor #2 */
- ; *xptr = x;
- ; for (i=0;i<N3;i++)
- ; y3 += *xptr++ * *wptr++;
- LDI order3-2,RC
- RPTBD filter
- LDF *AR6,R6 ; input x(n)
- STF R6,*AR0 ; insert x(n) to buffer
- MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || SUBF3 R2,R2,R2 ; R2 = 0.0
- filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || ADDF3 R1,R2,R2 ; y3(n) = w[].x[]
- ADDF R1,R2 ; include last result
- ; Output y2(n) signals
- ; pass(y3); /* pass y3 to processor #1 */
- STF R2,*+AR5(1) ; send y3(n) to #1 C40
- ; Input error signal e(n)
- ; receive(e); /* receive e(n) form processor #1 */
- LDF *AR5,R7 ; load e(n) from #1 C40
- ; Update weights w(n)
- ; xptr = &x[N3-1];
- ; wptr = &w[N3-1];
- ; pass (*xptr); /* pass x(n-N1-N2-N3) to processor #4 */
- ; for (i=N3;i>0;i--){
- ; *wptr-- += mu * e *xptr--;
- ; *(xptr+1) = *xptr; /* delayed tap is implemented
- ; in circular buffer */
- ; }
- ;
- LDI order3-3,RC ; initialize repeat counter
- RPTBD weight ; do i = 0, N3-3
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
- ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
- NOP
-
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
- || STF R2,*AR1++(1)% ; update wi(n+1)
- weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
-
- LDF *AR0,R6
- || STF R2,*AR1++(1)% ; update wi(n+1)
- BD input ; delay branch
- MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
- || STF R6,*+AR7(1) ; shift x(n-N) to #4 C40
- ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
- STF R2,*AR1++(1)% ; update last w
-
- ; Define constants
- xn .usect "buffer",order3
- wn .usect "coeffs",order3
- .data
- C40addr1 .word C40_3_1
- C40addr2 .word C40_3_2
- C40addr4 .word C40_3_4
- xn_addr .word xn
- wn_addr .word wn
- .end
-
-
- [LISTING NINE]
-
- ******************************************************************
- * LMS4 : Cascade TMS320C40 adaptive filter #4 Using Transversal
- * Structure and LMS Algorithm, Looped Code
- * Configuration:
- * d(n) --------------------------+
- * |
- * e(n) |+
- * +-----<-----(SUM)
- * | |-
- * --------+-------- |
- * x(n) ----|Adaptive Filter|-----+--------> y(n)
- * -----------------
- * +--------<-------+-------<--------+-------<--------+
- * | |y2(n) |y3(n) |y4(n)
- * y(n)<-+ | | | |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
- * x(n)---->| |----->| |----->| |----->| |
- * +->| # 1 | | # 2 | | # 3 | | # 4 |
- * | +----+----+ +----+----+ +----+----+ +----+----+
- * d(n)--+ | | | |
- * e(n)| | | |
- * +-------->-------+------->--------+------->--------+
- * where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
- * Algorithm for processor #4:
- * N4-1
- * y4(n) = SUM w(N1+N2+N3+k)*x(n-N1-N2-N3-k) k=0,1,2,...,N4-1
- * k=0
- * w(N1+N2+N3+k) = w(N1+N2+N3+k) + u*e(n)*x(n-N1-N2-N3-k) k=0,1,2,...,N4-1
- * where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
- **********************************************************************
- .include "const.h" ; include the constant definition file
- .sect "vector"
- reset .word begin
- ; Initialize pointers and arrays
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; for (i=0;i<N4;i++){
- ; *xptr++ = 0.0;
- ; *wptr++ = 0.0;
- ; }
- .text
- begin .set $
- LDP @C40addr1 ; set data page
- LDI 0,R2 ; R2 = 0
- LDF 0.0,R1 ; R1 = 0.0
- LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
- LDI @C40addr2,AR6 ; set pointer for #2 C40 comm port
- LDI @C40addr3,AR7 ; set pointer for #3 C40 comm port
- LDI @xn_addr,AR0 ; set pointer for x[]
- LDI @wn_addr,AR1 ; set pointer for w[]
- STI R2,*-AR5(1) ; enable #1 C40 comm port
- STI R2,*-AR6(1) ; enable #2 C40 comm port
- STI R2,*-AR7(1) ; enable #3 C40 comm port
- RPTS order4-1
- STF R1,*AR0++(1)% ; x[] = 0.
- || STF R1,*AR1++(1)% ; w[] = 0.
- LDI order4,BK ; set up circular buffer
- input:
- ; Compute filter output y(n)
- ; xptr = &x[0];
- ; wptr = &w[0];
- ; receive(x); /* receive x(n-N1-N2-N3) from processor #3 */
- ; *xptr = x;
- ; for (i=0;i<N4;i++)
- ; y4 += *xptr++ * *wptr++;
- LDI order4-2,RC
- RPTBD filter
- LDF *AR7,R6 ; input x(n)
- STF R6,*AR0 ; insert x(n) to buffer
- MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || SUBF3 R2,R2,R2 ; R2 = 0.0
- filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
- || ADDF3 R1,R2,R2 ; y4(n) = w[].x[]
- ADDF R1,R2 ; include last result
- ; Output y4(n) signals
- ; pass(y4); /* pass y4 to processor #1 */
- STF R2,*+AR5(1) ; send y4(n) to #1 C40
- ; Input error signal e(n)
- ; receive(e); /* receive e(n) form processor #1 */
- LDF *AR5,R7 ; load e(n) from #1 C40
- ; Update weights w(n)
- ; xptr = &x[N4-1];
- ; wptr = &w[N4-1];
- ; for (i=N3;i>0;i--){
- ; *wptr-- += mu * e *xptr--;
- ; *(xptr+1) = *xptr; /* delayed tap is implemented
- ; in circular buffer */
- ; }
- LDI order4-3,RC ; initialize repeat counter
- RPTBD weight ; do i = 0, N4-3
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
- ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
- NOP
-
- MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
- || STF R2,*AR1++(1)% ; update wi(n+1)
- weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
-
- BD input ; delay branch
- MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
- || STF R2,*AR1++(1)% ; update wi(n+1)
- ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
- STF R2,*AR1++(1)% ; update last w
-
- ; Define constants
- xn .usect "buffer",order4
- wn .usect "coeffs",order4
- .data
- C40addr1 .word C40_4_1
- C40addr2 .word C40_4_2
- C40addr3 .word C40_4_3
- xn_addr .word xn
- wn_addr .word wn
- .end
-